clear

***competition info
use competitions
keep competitionid evaluationalgorithmismax hostsegmenttitle hasleaderboard maxteamsize banteammergers enableteammodels rewardtype rewardquantity numprizes totalteams totalcompetitors totalsubmissions maxdailysubmissions teammergerdeadlinedate
destring competitionid, replace force
drop if competitionid==.
save competitioninfo, replace

***merge teams/teammergers/submissions/competitioninfo
use teams
joinby teamid using teammergers, unmatched(both)
tab _merge
drop _merge

joinby teamid using submissions, unmatched(both)
tab _merge
drop _merge

joinby competitionid submitteduserid using teamswitch, unmatched(master)
tab _merge
drop _merge

joinby competitionid using competitioninfo, unmatched(both)
tab _merge
drop _merge

*keep featured competitions
keep if hostsegmenttitle=="Featured"

*Process dates
foreach k in requestdate1 requestdate2 requestdate3 requestdate4 requestdate5 submissiondate newteam_date  {
gen `k'_aux = date(`k',"MDY")
drop `k'
rename `k'_aux `k'
}
gen teammergerdeadlinedate_aux = date(teammergerdeadlinedate,"MDYhms")
drop teammergerdeadlinedate
rename teammergerdeadlinedate_aux teammergerdeadlinedate

*Time of merger -- I construct the variable for teams of size 2, which is what we use in the rest of the analysis
gen merger_time  = .
replace merger_time = max(requestdate1,requestdate2) if teamsize==2

/*
*Interim team size - i.e., how the team size changes over time
gen teamsize_interim = teamsize
replace teamsize_interim = (submissiondate>requestdate1) + (submissiondate>requestdate2) if teamsize==2
replace teamsize_interim = (submissiondate>requestdate1) + (submissiondate>requestdate2) + (submissiondate>requestdate3) if teamsize==3
replace teamsize_interim = (submissiondate>requestdate1) + (submissiondate>requestdate2) + (submissiondate>requestdate3) + (submissiondate>requestdate4) if teamsize==4
replace teamsize_interim = (submissiondate>requestdate1) + (submissiondate>requestdate2) + (submissiondate>requestdate3) + (submissiondate>requestdate4) +(submissiondate>requestdate5) if teamsize>4 & teamsize~=.
replace teamsize_interim = teamsize if teamsize_interim==0 
gen newteam_submission = 0
replace newteam_submission = 1 if newteam_submission~=. & submissiondate>newteam_date
sum newteam_submission
*/

*Standardize scores
gen publicscorefullprecision_aux=publicscorefullprecision
gen privatescorefullprecision_aux=privatescorefullprecision
foreach k of numlist 1(1)2 {
bys competitionid: egen meanpub = mean(publicscorefullprecision_aux)
bys competitionid: egen meanpri = mean(privatescorefullprecision_aux)
bys competitionid: egen sdpub = sd(publicscorefullprecision)
bys competitionid: egen sdpri = sd(privatescorefullprecision)
gen pubscore_normal = (publicscorefullprecision-meanpub)/sdpub
gen priscore_normal = (privatescorefullprecision-meanpri)/sdpri
drop meanpub meanpri sdpub sdpri
replace publicscorefullprecision_aux = . if abs(pubscore_normal)>10
replace privatescorefullprecision_aux = . if abs(priscore_normal)>10
if(`k'<2){
drop pubscore_normal priscore_normal
}
}
drop publicscorefullprecision_aux privatescorefullprecision_aux

*Redefine scores to make "higher scores" better
replace pubscore_normal = -pubscore_normal if evaluationalgorithmismax=="False"
replace priscore_normal = -priscore_normal if evaluationalgorithmismax=="False"

save combined_dataset_2024, replace


